In [132]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import date
%matplotlib inline
In [133]:
data = pd.read_csv("Stolen_Bikes-2014.csv")
totalData = data.copy(deep=True)
data.head()
Out[133]:
In [134]:
len(data)
Out[134]:
In [135]:
for i in data[data.columns[1:]]:
print np.unique(data[i])
I have no idea what "DR NO" means, but let's continue.
In [136]:
count_by_area = np.unique(data['AREA NAME'],return_counts=True)
count_by_area
Out[136]:
In [137]:
plt.figure(figsize=(15,5))
plt.bar(range(len(count_by_area[0])),count_by_area[1])
plt.xticks(range(len(count_by_area[0])),count_by_area[0],rotation=70)
plt.grid()
plt.show()
In [138]:
count_by_date = np.unique(data['DATE OCC'],return_counts=True)
print count_by_date[0][0:5]
print count_by_date[1][0:5]
In [139]:
def whichDay(dt):
month, day, year = [int(x) for x in dt.split('/')]
return datetime(year,month,day).weekday()#monday is 0, Sunday is 6
data['DATE OCC'] = data['DATE OCC'].apply(whichDay)
data.head()
Out[139]:
In [140]:
count_by_day_of_week = np.unique(data['DATE OCC'],return_counts=True)
count_by_day_of_week
Out[140]:
In [141]:
plt.plot(count_by_day_of_week[0],count_by_day_of_week[1],marker='x')
plt.ylim([0,200])
plt.xticks(range(0,7),['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday'],rotation=70)
plt.title("Bikes stolen by day of the week for 2014")
plt.ylabel("# of bikes stolen")
plt.grid()
plt.show()
In [142]:
plt.figure(figsize=(15,6))
cityNames = np.unique(totalData['AREA NAME'])
for i in range(len(cityNames)):#for each city
tempData = totalData.copy(deep=True)
tempData = tempData[tempData['AREA NAME']==cityNames[i]]#for individual city
tempData['DATE OCC'] = tempData['DATE OCC'].apply(whichDay)
count_by_day_of_week = np.unique(tempData['DATE OCC'],return_counts=True)
plt.plot(count_by_day_of_week[0],count_by_day_of_week[1],label=cityNames[i])
plt.xticks(range(0,7),['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday'],rotation=70)
plt.title("Bikes stolen by day of the week for each city for 2014")
plt.ylabel("# of bikes stolen")
plt.legend(bbox_to_anchor=(1.2, 1.05))
plt.grid()
plt.show()
This is SUPER messy, but hopefully someone can expand on it.
In [143]:
totalData.head()
Out[143]:
In [144]:
def whichMonth(dt):
month, day, year = [int(x) for x in dt.split('/')]
return month
monthData = totalData.copy(deep=True)
monthData['DATE OCC'] = monthData['DATE OCC'].apply(whichMonth)
monthData.head()
Out[144]:
In [145]:
count_by_month = np.unique(monthData['DATE OCC'],return_counts=True)
count_by_month
Out[145]:
In [147]:
plt.plot(count_by_month[0],count_by_month[1],marker='x')
plt.ylim([0,120])
plt.grid()
plt.xticks(range(1,13),['January','February','March','April','May','June','July','August','September','October','November','December'],rotation=70)
plt.title("Bikes stolen by month for the year of 2014")
plt.ylabel("# of bikes stolen")
plt.show()
In [ ]: